no_warns ?= n
xen_ia64_expose_p2m ?= y
xen_ia64_pervcpu_vhpt ?= y
+xen_ia64_tlb_track ?= y
+xen_ia64_tlb_track_cnt ?= n
ifneq ($(COMPILE_ARCH),$(TARGET_ARCH))
CROSS_COMPILE ?= /usr/local/sp_env/v2.2.5/i686/bin/ia64-unknown-linux-
ifeq ($(xen_ia64_pervcpu_vhpt),y)
CFLAGS += -DCONFIG_XEN_IA64_PERVCPU_VHPT
endif
+ifeq ($(xen_ia64_tlb_track),y)
+CFLAGS += -DCONFIG_XEN_IA64_TLB_TRACK
+endif
+ifeq ($(xen_ia64_tlb_track_cnt),y)
+CFLAGS += -DCONFIG_TLB_TRACK_CNT
+endif
ifeq ($(no_warns),y)
CFLAGS += -Wa,--fatal-warnings -Werror -Wno-uninitialized
endif
obj-y += xencomm.o
obj-$(crash_debug) += gdbstub.o
+obj-$(xen_ia64_tlb_track) += tlb_track.o
#include <asm/dom_fw.h>
#include <asm/shadow.h>
#include <xen/guest_access.h>
+#include <asm/tlb_track.h>
unsigned long dom0_size = 512*1024*1024;
unsigned long dom0_align = 64*1024*1024;
DPRINTK("%s:%d domain %d pervcpu_vhpt %d\n",
__func__, __LINE__, d->domain_id, d->arch.has_pervcpu_vhpt);
#endif
+ if (tlb_track_create(d) < 0)
+ goto fail_nomem1;
d->shared_info = alloc_xenheap_pages(get_order_from_shift(XSI_SHIFT));
if (d->shared_info == NULL)
goto fail_nomem;
return 0;
fail_nomem:
+ tlb_track_destroy(d);
+fail_nomem1:
if (d->arch.mm.pgd != NULL)
pgd_free(d->arch.mm.pgd);
if (d->shared_info != NULL)
if (d->arch.shadow_bitmap != NULL)
xfree(d->arch.shadow_bitmap);
+ tlb_track_destroy(d);
+
/* Clear vTLB for the next domain. */
domain_flush_tlb_vhpt(d);
#include <asm/asm-xsi-offsets.h>
#include <asm/shadow.h>
#include <asm/uaccess.h>
+#include <asm/p2m_entry.h>
extern void die_if_kernel(char *str, struct pt_regs *regs, long err);
/* FIXME: where these declarations shold be there ? */
fault = vcpu_translate(current,address,is_data,&pteval,&itir,&iha);
if (fault == IA64_NO_FAULT || fault == IA64_USE_TLB) {
struct p2m_entry entry;
- pteval = translate_domain_pte(pteval, address, itir, &logps, &entry);
- vcpu_itc_no_srlz(current,is_data?2:1,address,pteval,-1UL,logps);
+ unsigned long m_pteval;
+ m_pteval = translate_domain_pte(pteval, address, itir,
+ &logps, &entry);
+ vcpu_itc_no_srlz(current, (is_data? 2: 1) | 4,
+ address, m_pteval, pteval, logps, &entry);
if ((fault == IA64_USE_TLB && !current->arch.dtlb.pte.p) ||
p2m_entry_retry(&entry)) {
/* dtlb has been purged in-between. This dtlb was
#include <asm/vhpt.h>
#include <asm/vcpu.h>
#include <asm/shadow.h>
+#include <asm/p2m_entry.h>
+#include <asm/tlb_track.h>
#include <linux/efi.h>
#include <xen/guest_access.h>
#include <asm/page.h>
#include <public/memory.h>
static void domain_page_flush(struct domain* d, unsigned long mpaddr,
- unsigned long old_mfn, unsigned long new_mfn);
+ volatile pte_t* ptep, pte_t old_pte);
extern unsigned long ia64_iobase;
res |= flags & ASSIGN_readonly ? _PAGE_AR_R: _PAGE_AR_RWX;
res |= flags & ASSIGN_nocache ? _PAGE_MA_UC: _PAGE_MA_WB;
+#ifdef CONFIG_XEN_IA64_TLB_TRACK
+ res |= flags & ASSIGN_tlb_track ? _PAGE_TLB_TRACKING: 0;
+#endif
return res;
}
/* map a physical address to the specified metaphysical addr */
-// flags: currently only ASSIGN_readonly, ASSIGN_nocache
+// flags: currently only ASSIGN_readonly, ASSIGN_nocache, ASSIGN_tlb_tack
// This is called by assign_domain_mmio_page().
// So accessing to pte is racy.
int
// caller must call set_gpfn_from_mfn() before call if necessary.
// because set_gpfn_from_mfn() result must be visible before pte xchg
// caller must use memory barrier. NOTE: xchg has acquire semantics.
-// flags: currently only ASSIGN_readonly
+// flags: ASSIGN_xxx
static void
assign_domain_page_replace(struct domain *d, unsigned long mpaddr,
unsigned long mfn, unsigned long flags)
set_gpfn_from_mfn(old_mfn, INVALID_M2P_ENTRY);
}
- domain_page_flush(d, mpaddr, old_mfn, mfn);
+ domain_page_flush(d, mpaddr, pte, old_pte);
try_to_clear_PGC_allocate(d, old_page);
put_page(old_page);
struct mm_struct *mm = &d->arch.mm;
volatile pte_t* pte;
unsigned long old_mfn;
- unsigned long old_arflags;
+ unsigned long old_prot;
pte_t old_pte;
unsigned long new_mfn;
unsigned long new_prot;
pte = lookup_alloc_domain_pte(d, mpaddr);
again:
- old_arflags = pte_val(*pte) & ~_PAGE_PPN_MASK;
+ old_prot = pte_val(*pte) & ~_PAGE_PPN_MASK;
old_mfn = page_to_mfn(old_page);
- old_pte = pfn_pte(old_mfn, __pgprot(old_arflags));
+ old_pte = pfn_pte(old_mfn, __pgprot(old_prot));
if (!pte_present(old_pte)) {
- DPRINTK("%s: old_pte 0x%lx old_arflags 0x%lx old_mfn 0x%lx\n",
- __func__, pte_val(old_pte), old_arflags, old_mfn);
+ DPRINTK("%s: old_pte 0x%lx old_prot 0x%lx old_mfn 0x%lx\n",
+ __func__, pte_val(old_pte), old_prot, old_mfn);
return -EINVAL;
}
goto again;
}
- DPRINTK("%s: old_pte 0x%lx old_arflags 0x%lx old_mfn 0x%lx "
+ DPRINTK("%s: old_pte 0x%lx old_prot 0x%lx old_mfn 0x%lx "
"ret_pte 0x%lx ret_mfn 0x%lx\n",
__func__,
- pte_val(old_pte), old_arflags, old_mfn,
+ pte_val(old_pte), old_prot, old_mfn,
pte_val(ret_pte), pte_pfn(ret_pte));
return -EINVAL;
}
set_gpfn_from_mfn(old_mfn, INVALID_M2P_ENTRY);
- domain_page_flush(d, mpaddr, old_mfn, new_mfn);
+ domain_page_flush(d, mpaddr, pte, old_pte);
put_page(old_page);
perfc_incrc(assign_domain_pge_cmpxchg_rel);
return 0;
set_gpfn_from_mfn(mfn, INVALID_M2P_ENTRY);
}
- domain_page_flush(d, mpaddr, mfn, INVALID_MFN);
+ domain_page_flush(d, mpaddr, pte, old_pte);
if (page_get_owner(page) != NULL) {
try_to_clear_PGC_allocate(d, page);
BUG_ON(ret == 0);
BUG_ON(page_get_owner(mfn_to_page(mfn)) == d &&
get_gpfn_from_mfn(mfn) != INVALID_M2P_ENTRY);
- assign_domain_page_replace(d, gpaddr, mfn, (flags & GNTMAP_readonly)?
- ASSIGN_readonly: ASSIGN_writable);
+ assign_domain_page_replace(d, gpaddr, mfn,
+#ifdef CONFIG_XEN_IA64_TLB_TRACK
+ ASSIGN_tlb_track |
+#endif
+ ((flags & GNTMAP_readonly) ?
+ ASSIGN_readonly : ASSIGN_writable));
perfc_incrc(create_grant_host_mapping);
return GNTST_okay;
}
}
BUG_ON(pte_pfn(old_pte) != mfn);
- domain_page_flush(d, gpaddr, mfn, INVALID_MFN);
+ domain_page_flush(d, gpaddr, pte, old_pte);
page = mfn_to_page(mfn);
BUG_ON(page_get_owner(page) == d);//try_to_clear_PGC_allocate(d, page) is not needed.
// flush finer range.
static void
domain_page_flush(struct domain* d, unsigned long mpaddr,
- unsigned long old_mfn, unsigned long new_mfn)
+ volatile pte_t* ptep, pte_t old_pte)
{
+#ifdef CONFIG_XEN_IA64_TLB_TRACK
+ struct tlb_track_entry* entry;
+#endif
+
if (shadow_mode_enabled(d))
shadow_mark_page_dirty(d, mpaddr >> PAGE_SHIFT);
+#ifndef CONFIG_XEN_IA64_TLB_TRACK
domain_flush_vtlb_all();
+#else
+ switch (tlb_track_search_and_remove(d->arch.tlb_track,
+ ptep, old_pte, &entry)) {
+ case TLB_TRACK_NOT_TRACKED:
+ // DPRINTK("%s TLB_TRACK_NOT_TRACKED\n", __func__);
+ domain_flush_vtlb_all();
+ break;
+ case TLB_TRACK_NOT_FOUND:
+ /* do nothing */
+ // DPRINTK("%s TLB_TRACK_NOT_FOUND\n", __func__);
+ break;
+ case TLB_TRACK_FOUND:
+ // DPRINTK("%s TLB_TRACK_FOUND\n", __func__);
+ domain_flush_vtlb_track_entry(d, entry);
+ tlb_track_free_entry(d->arch.tlb_track, entry);
+ break;
+ case TLB_TRACK_MANY:
+ DPRINTK("%s TLB_TRACK_MANY\n", __func__);
+ domain_flush_vtlb_all();
+ break;
+ case TLB_TRACK_AGAIN:
+ DPRINTK("%s TLB_TRACK_AGAIN\n", __func__);
+ BUG();
+ break;
+ }
+#endif
perfc_incrc(domain_page_flush);
}
--- /dev/null
+/******************************************************************************
+ * tlb_track.c
+ *
+ * Copyright (c) 2006 Isaku Yamahata <yamahata at valinux co jp>
+ * VA Linux Systems Japan K.K.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ */
+
+#include <asm/tlb_track.h>
+#include <asm/p2m_entry.h>
+#include <asm/vmx_mm_def.h> /* for IA64_RR_SHIFT */
+#include <asm/vmx_vcpu.h> /* for VRN7 */
+#include <asm/vcpu.h> /* for PSCB() */
+
+#define CONFIG_TLB_TRACK_DEBUG
+#ifdef CONFIG_TLB_TRACK_DEBUG
+# define tlb_track_printd(fmt, ...) \
+ printf("%s:%d " fmt, __func__, __LINE__, ##__VA_ARGS__)
+#else
+# define tlb_track_printd(fmt, ...) do { } while (0)
+#endif
+
+static int
+tlb_track_allocate_entries(struct tlb_track* tlb_track)
+{
+ struct page_info* entry_page;
+ struct tlb_track_entry* track_entries;
+ unsigned int allocated;
+ unsigned long i;
+
+ BUG_ON(tlb_track->num_free > 0);
+ if (tlb_track->num_entries >= tlb_track->limit) {
+ DPRINTK("%s: num_entries %d limit %d\n",
+ __func__, tlb_track->num_entries, tlb_track->limit);
+ return -ENOMEM;
+ }
+ entry_page = alloc_domheap_page(NULL);
+ if (entry_page == NULL) {
+ DPRINTK("%s: domheap page failed. num_entries %d limit %d\n",
+ __func__, tlb_track->num_entries, tlb_track->limit);
+ return -ENOMEM;
+ }
+
+ list_add(&entry_page->list, &tlb_track->page_list);
+ track_entries = (struct tlb_track_entry*)page_to_virt(entry_page);
+ allocated = PAGE_SIZE / sizeof(track_entries[0]);
+ tlb_track->num_entries += allocated;
+ tlb_track->num_free += allocated;
+ for (i = 0; i < allocated; i++) {
+ list_add(&track_entries[i].list, &tlb_track->free_list);
+ // tlb_track_printd("track_entries[%ld] 0x%p\n", i, &track_entries[i]);
+ }
+ tlb_track_printd("allocated %d num_entries %d num_free %d\n",
+ allocated, tlb_track->num_entries, tlb_track->num_free);
+ return 0;
+}
+
+
+int
+tlb_track_create(struct domain* d)
+{
+ struct tlb_track* tlb_track = NULL;
+ struct page_info* hash_page = NULL;
+ unsigned int hash_size;
+ unsigned int hash_shift;
+ unsigned int i;
+
+ tlb_track = xmalloc(struct tlb_track);
+ if (tlb_track == NULL)
+ goto out;
+
+ hash_page = alloc_domheap_page(NULL);
+ if (hash_page == NULL)
+ goto out;
+
+ spin_lock_init(&tlb_track->free_list_lock);
+ INIT_LIST_HEAD(&tlb_track->free_list);
+ tlb_track->limit = TLB_TRACK_LIMIT_ENTRIES;
+ tlb_track->num_entries = 0;
+ tlb_track->num_free = 0;
+ INIT_LIST_HEAD(&tlb_track->page_list);
+ if (tlb_track_allocate_entries(tlb_track) < 0)
+ goto out;
+
+ spin_lock_init(&tlb_track->hash_lock);
+ /* XXX hash size optimization */
+ hash_size = PAGE_SIZE / sizeof(tlb_track->hash[0]);
+ for (hash_shift = 0; (1 << (hash_shift + 1)) < hash_size; hash_shift++)
+ /* nothing */;
+ tlb_track->hash_size = (1 << hash_shift);
+ tlb_track->hash_shift = hash_shift;
+ tlb_track->hash_mask = (1 << hash_shift) - 1;
+ tlb_track->hash = page_to_virt(hash_page);
+ for (i = 0; i < tlb_track->hash_size; i++)
+ INIT_LIST_HEAD(&tlb_track->hash[i]);
+
+ smp_mb(); /* make initialization visible before use. */
+ d->arch.tlb_track = tlb_track;
+ printk("%s:%d hash 0x%p hash_size %d \n",
+ __func__, __LINE__, tlb_track->hash, tlb_track->hash_size);
+
+ return 0;
+
+out:
+ if (hash_page != NULL)
+ free_domheap_page(hash_page);
+
+ if (tlb_track != NULL)
+ xfree(tlb_track);
+
+ return -ENOMEM;
+}
+
+void
+tlb_track_destroy(struct domain* d)
+{
+ struct tlb_track* tlb_track = d->arch.tlb_track;
+ struct page_info* page;
+ struct page_info* next;
+
+ spin_lock(&tlb_track->free_list_lock);
+ BUG_ON(tlb_track->num_free != tlb_track->num_entries);
+
+ list_for_each_entry_safe(page, next, &tlb_track->page_list, list) {
+ list_del(&page->list);
+ free_domheap_page(page);
+ }
+
+ free_domheap_page(virt_to_page(tlb_track->hash));
+ xfree(tlb_track);
+ // d->tlb_track = NULL;
+}
+
+static struct tlb_track_entry*
+tlb_track_get_entry(struct tlb_track* tlb_track)
+{
+ struct tlb_track_entry* entry = NULL;
+ spin_lock(&tlb_track->free_list_lock);
+ if (tlb_track->num_free == 0)
+ (void)tlb_track_allocate_entries(tlb_track);
+
+ if (tlb_track->num_free > 0) {
+ BUG_ON(list_empty(&tlb_track->free_list));
+ entry = list_entry(tlb_track->free_list.next,
+ struct tlb_track_entry, list);
+ tlb_track->num_free--;
+ list_del(&entry->list);
+ }
+ spin_unlock(&tlb_track->free_list_lock);
+ return entry;
+}
+
+void
+tlb_track_free_entry(struct tlb_track* tlb_track,
+ struct tlb_track_entry* entry)
+{
+ spin_lock(&tlb_track->free_list_lock);
+ list_add(&entry->list, &tlb_track->free_list);
+ tlb_track->num_free++;
+ spin_unlock(&tlb_track->free_list_lock);
+}
+
+
+#include <linux/hash.h>
+/* XXX hash function. */
+static struct list_head*
+tlb_track_hash_head(struct tlb_track* tlb_track, volatile pte_t* ptep)
+{
+ unsigned long hash = hash_long((unsigned long)ptep, tlb_track->hash_shift);
+ BUG_ON(hash >= tlb_track->hash_size);
+ BUG_ON((hash & tlb_track->hash_mask) != hash);
+ return &tlb_track->hash[hash];
+}
+
+static int
+tlb_track_pte_zapped(pte_t old_pte, pte_t ret_pte)
+{
+ if (pte_pfn(old_pte) != pte_pfn(ret_pte) ||
+ (pte_val(old_pte) & ~(_PFN_MASK | _PAGE_TLB_TRACK_MASK)) !=
+ (pte_val(ret_pte) & ~(_PFN_MASK | _PAGE_TLB_TRACK_MASK))) {
+ /* Other thread zapped the p2m entry. */
+ return 1;
+ }
+ return 0;
+}
+
+static TLB_TRACK_RET_T
+tlb_track_insert_or_dirty(struct tlb_track* tlb_track, struct mm_struct* mm,
+ volatile pte_t* ptep, pte_t old_pte,
+ unsigned long vaddr, unsigned long rid)
+{
+ unsigned long mfn = pte_pfn(old_pte);
+ struct list_head* head = tlb_track_hash_head(tlb_track, ptep);
+ struct tlb_track_entry* entry;
+ struct tlb_track_entry* new_entry = NULL;
+ unsigned long bit_to_be_set = _PAGE_TLB_INSERTED;
+ pte_t new_pte;
+ pte_t ret_pte;
+
+ struct vcpu* v = current;
+ TLB_TRACK_RET_T ret = TLB_TRACK_NOT_FOUND;
+
+#if 0 /* this is done at vcpu_tlb_track_insert_or_dirty() */
+ perfc_incrc(tlb_track_iod);
+ if (!pte_tlb_tracking(old_pte)) {
+ perfc_incrc(tlb_track_iod_not_tracked);
+ return TLB_TRACK_NOT_TRACKED;
+ }
+#endif
+ if (pte_tlb_inserted_many(old_pte)) {
+ perfc_incrc(tlb_track_iod_tracked_many);
+ return TLB_TRACK_MANY;
+ }
+
+ /* vaddr must be normalized so that it is in vrn7 and page aligned. */
+ BUG_ON((vaddr >> IA64_RR_SHIFT) != VRN7);
+ BUG_ON((vaddr & ~PAGE_MASK) != 0);
+#if 0
+ tlb_track_printd("\n"
+ "\tmfn 0x%016lx\n"
+ "\told_pte 0x%016lx ptep 0x%p\n"
+ "\tptep_val 0x%016lx vaddr 0x%016lx rid %ld\n"
+ "\ttlb_track 0x%p head 0x%p\n",
+ mfn,
+ pte_val(old_pte), ptep, pte_val(*ptep),
+ vaddr, rid,
+ tlb_track, head);
+#endif
+
+ again:
+ /*
+ * zapping side may zap the p2m entry and then remove tlb track entry
+ * non-atomically. We may see the stale tlb track entry here.
+ * p2m_entry_retry() handles such a case.
+ * Or other thread may zap the p2m entry and remove tlb track entry
+ * and inserted new tlb track entry.
+ */
+ spin_lock(&tlb_track->hash_lock);
+ list_for_each_entry(entry, head, list) {
+ if (entry->ptep != ptep)
+ continue;
+
+ if (pte_pfn(entry->pte_val) == mfn) {
+ // tlb_track_entry_printf(entry);
+ if (entry->vaddr == vaddr && entry->rid == rid) {
+ // tlb_track_printd("TLB_TRACK_FOUND\n");
+ ret = TLB_TRACK_FOUND;
+ perfc_incrc(tlb_track_iod_found);
+#ifdef CONFIG_TLB_TRACK_CNT
+ entry->cnt++;
+ if (entry->cnt > TLB_TRACK_CNT_FORCE_MANY) {
+ /*
+ * heuristics:
+ * If a page is used to transfer data by dev channel,
+ * it would be unmapped with small amount access
+ * (once or twice tlb insert) after real device
+ * I/O completion. It would be short period.
+ * However this page seems to be accessed many times.
+ * We guess that this page is used I/O ring
+ * so that tracking this entry might be useless.
+ */
+ // tlb_track_entry_printf(entry);
+ // tlb_track_printd("cnt = %ld\n", entry->cnt);
+ perfc_incrc(tlb_track_iod_force_many);
+ goto force_many;
+ }
+#endif
+ goto found;
+ } else {
+#ifdef CONFIG_TLB_TRACK_CNT
+ force_many:
+#endif
+ if (!pte_tlb_inserted(old_pte)) {
+ printk("%s:%d racy update\n", __func__, __LINE__);
+ old_pte = __pte(pte_val(old_pte) | _PAGE_TLB_INSERTED);
+ }
+ new_pte = __pte(pte_val(old_pte) | _PAGE_TLB_INSERTED_MANY);
+ ret_pte = ptep_cmpxchg_rel(mm, vaddr, ptep, old_pte, new_pte);
+ if (pte_val(ret_pte) != pte_val(old_pte)) {
+ // tlb_track_printd("TLB_TRACK_AGAIN\n");
+ ret = TLB_TRACK_AGAIN;
+ perfc_incrc(tlb_track_iod_again);
+ } else {
+ // tlb_track_printd("TLB_TRACK_MANY del entry 0x%p\n",
+ // entry);
+ ret = TLB_TRACK_MANY;
+ list_del(&entry->list);
+ // tlb_track_entry_printf(entry);
+ perfc_incrc(tlb_track_iod_tracked_many_del);
+ }
+ goto out;
+ }
+ }
+
+ /*
+ * Other thread changed the p2m entry and removed and inserted new
+ * tlb tracn entry after we get old_pte, but before we get
+ * spinlock.
+ */
+ // tlb_track_printd("TLB_TRACK_AGAIN\n");
+ ret = TLB_TRACK_AGAIN;
+ perfc_incrc(tlb_track_iod_again);
+ goto out;
+ }
+
+ entry = NULL; // prevent freeing entry.
+ if (pte_tlb_inserted(old_pte)) {
+ /* Other thread else removed the tlb_track_entry after we got old_pte
+ before we got spin lock. */
+ ret = TLB_TRACK_AGAIN;
+ perfc_incrc(tlb_track_iod_again);
+ goto out;
+ }
+ if (new_entry == NULL && bit_to_be_set == _PAGE_TLB_INSERTED) {
+ spin_unlock(&tlb_track->hash_lock);
+ new_entry = tlb_track_get_entry(tlb_track);
+ if (new_entry == NULL) {
+ tlb_track_printd("get_entry failed\n");
+ /* entry can't be allocated.
+ fall down into full flush mode. */
+ bit_to_be_set |= _PAGE_TLB_INSERTED_MANY;
+ perfc_incrc(tlb_track_iod_new_failed);
+ }
+ // tlb_track_printd("new_entry 0x%p\n", new_entry);
+ perfc_incrc(tlb_track_iod_new_entry);
+ goto again;
+ }
+
+ BUG_ON(pte_tlb_inserted_many(old_pte));
+ new_pte = __pte(pte_val(old_pte) | bit_to_be_set);
+ ret_pte = ptep_cmpxchg_rel(mm, vaddr, ptep, old_pte, new_pte);
+ if (pte_val(old_pte) != pte_val(ret_pte)) {
+ if (tlb_track_pte_zapped(old_pte, ret_pte)) {
+ // tlb_track_printd("zapped TLB_TRACK_AGAIN\n");
+ ret = TLB_TRACK_AGAIN;
+ perfc_incrc(tlb_track_iod_again);
+ goto out;
+ }
+
+ /* Other thread set _PAGE_TLB_INSERTED and/or _PAGE_TLB_INSERTED_MANY */
+ if (pte_tlb_inserted_many(ret_pte)) {
+ /* Other thread already set _PAGE_TLB_INSERTED_MANY and
+ removed the entry. */
+ // tlb_track_printd("iserted TLB_TRACK_MANY\n");
+ BUG_ON(!pte_tlb_inserted(ret_pte));
+ ret = TLB_TRACK_MANY;
+ perfc_incrc(tlb_track_iod_new_many);
+ goto out;
+ }
+ BUG_ON(pte_tlb_inserted(ret_pte));
+ BUG();
+ }
+ if (new_entry) {
+ // tlb_track_printd("iserting new_entry 0x%p\n", new_entry);
+ entry = new_entry;
+ new_entry = NULL;
+
+ entry->ptep = ptep;
+ entry->pte_val = old_pte;
+ entry->vaddr = vaddr;
+ entry->rid = rid;
+ cpus_clear(entry->pcpu_dirty_mask);
+ vcpus_clear(entry->vcpu_dirty_mask);
+ list_add(&entry->list, head);
+
+#ifdef CONFIG_TLB_TRACK_CNT
+ entry->cnt = 0;
+#endif
+ perfc_incrc(tlb_track_iod_insert);
+ // tlb_track_entry_printf(entry);
+ } else {
+ goto out;
+ }
+
+ found:
+ BUG_ON(v->processor >= NR_CPUS);
+ cpu_set(v->processor, entry->pcpu_dirty_mask);
+ BUG_ON(v->vcpu_id >= NR_CPUS);
+ vcpu_set(v->vcpu_id, entry->vcpu_dirty_mask);
+ perfc_incrc(tlb_track_iod_dirtied);
+
+ out:
+ spin_unlock(&tlb_track->hash_lock);
+ if (ret == TLB_TRACK_MANY && entry != NULL)
+ tlb_track_free_entry(tlb_track, entry);
+ if (new_entry != NULL)
+ tlb_track_free_entry(tlb_track, new_entry);
+ return ret;
+}
+
+void
+__vcpu_tlb_track_insert_or_dirty(struct vcpu *vcpu, unsigned long vaddr,
+ struct p2m_entry* entry)
+{
+ unsigned long vrn = vaddr >> IA64_RR_SHIFT;
+ unsigned long rid = PSCB(vcpu, rrs[vrn]);
+ TLB_TRACK_RET_T ret;
+
+ /* normalize vrn7
+ When linux dom0 case, vrn7 is the most common case. */
+ vaddr |= VRN7 << VRN_SHIFT;
+ vaddr &= PAGE_MASK;
+ ret = tlb_track_insert_or_dirty(vcpu->domain->arch.tlb_track,
+ &vcpu->domain->arch.mm,
+ entry->ptep, entry->used,
+ vaddr, rid);
+ if (ret == TLB_TRACK_AGAIN)
+ p2m_entry_set_retry(entry);
+}
+
+TLB_TRACK_RET_T
+tlb_track_search_and_remove(struct tlb_track* tlb_track,
+ volatile pte_t* ptep, pte_t old_pte,
+ struct tlb_track_entry** entryp)
+{
+ unsigned long mfn = pte_pfn(old_pte);
+ struct list_head* head = tlb_track_hash_head(tlb_track, ptep);
+ struct tlb_track_entry* entry;
+
+ perfc_incrc(tlb_track_sar);
+ if (!pte_tlb_tracking(old_pte)) {
+ perfc_incrc(tlb_track_sar_not_tracked);
+ return TLB_TRACK_NOT_TRACKED;
+ }
+ if (!pte_tlb_inserted(old_pte)) {
+ BUG_ON(pte_tlb_inserted_many(old_pte));
+ perfc_incrc(tlb_track_sar_not_found);
+ return TLB_TRACK_NOT_FOUND;
+ }
+ if (pte_tlb_inserted_many(old_pte)) {
+ BUG_ON(!pte_tlb_inserted(old_pte));
+ perfc_incrc(tlb_track_sar_many);
+ return TLB_TRACK_MANY;
+ }
+
+ spin_lock(&tlb_track->hash_lock);
+ list_for_each_entry(entry, head, list) {
+ if (entry->ptep != ptep)
+ continue;
+
+ if (pte_pfn(entry->pte_val) == mfn) {
+ list_del(&entry->list);
+ spin_unlock(&tlb_track->hash_lock);
+ *entryp = entry;
+ perfc_incrc(tlb_track_sar_found);
+ // tlb_track_entry_printf(entry);
+#ifdef CONFIG_TLB_TRACK_CNT
+ // tlb_track_printd("cnt = %ld\n", entry->cnt);
+#endif
+ return TLB_TRACK_FOUND;
+ }
+ BUG();
+ }
+ BUG();
+ spin_unlock(&tlb_track->hash_lock);
+ return TLB_TRACK_NOT_TRACKED;
+}
+
+/* for debug */
+void
+__tlb_track_entry_printf(const char* func, int line,
+ const struct tlb_track_entry* entry)
+{
+ char pcpumask_buf[NR_CPUS + 1];
+ char vcpumask_buf[MAX_VIRT_CPUS + 1];
+ cpumask_scnprintf(pcpumask_buf, sizeof(pcpumask_buf),
+ entry->pcpu_dirty_mask);
+ vcpumask_scnprintf(vcpumask_buf, sizeof(vcpumask_buf),
+ entry->vcpu_dirty_mask);
+ printk("%s:%d\n"
+ "\tmfn 0x%016lx\n"
+ "\told_pte 0x%016lx ptep 0x%p\n"
+ "\tpte_val 0x%016lx vaddr 0x%016lx rid %ld\n"
+ "\tpcpu_dirty_mask %s vcpu_dirty_mask %s\n"
+ "\tentry 0x%p\n",
+ func, line,
+ pte_pfn(entry->pte_val),
+ pte_val(entry->pte_val), entry->ptep, pte_val(*entry->ptep),
+ entry->vaddr, entry->rid,
+ pcpumask_buf, vcpumask_buf,
+ entry);
+}
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
#include <asm/bundle.h>
#include <asm/privop_stat.h>
#include <asm/uaccess.h>
+#include <asm/p2m_entry.h>
+#include <asm/tlb_track.h>
/* FIXME: where these declarations should be there ? */
extern void getreg(unsigned long regnum, unsigned long *val, int *nat, struct pt_regs *regs);
VCPU translation cache access routines
**************************************************************************/
-void vcpu_itc_no_srlz(VCPU *vcpu, UINT64 IorD, UINT64 vaddr, UINT64 pte, UINT64 mp_pte, UINT64 logps)
+void
+vcpu_itc_no_srlz(VCPU *vcpu, UINT64 IorD, UINT64 vaddr, UINT64 pte,
+ UINT64 mp_pte, UINT64 logps, struct p2m_entry* entry)
{
unsigned long psr;
unsigned long ps = (vcpu->domain==dom0) ? logps : PAGE_SHIFT;
"smaller page size!\n");
BUG_ON(logps > PAGE_SHIFT);
+ vcpu_tlb_track_insert_or_dirty(vcpu, vaddr, entry);
psr = ia64_clear_ic();
ia64_itc(IorD,vaddr,pte,ps); // FIXME: look for bigger mappings
ia64_set_psr(psr);
// PAGE_SIZE mapping in the vhpt for now, else purging is complicated
else vhpt_insert(vaddr,pte,PAGE_SHIFT<<2);
#endif
- if ((mp_pte == -1UL) || (IorD & 0x4)) // don't place in 1-entry TLB
+ if (IorD & 0x4) /* don't place in 1-entry TLB */
return;
if (IorD & 0x1) {
vcpu_set_tr_entry(&PSCBX(vcpu,itlb),mp_pte,ps<<2,vaddr);
pteval = translate_domain_pte(pte, ifa, itir, &logps, &entry);
if (!pteval) return IA64_ILLOP_FAULT;
if (swap_rr0) set_one_rr(0x0,PSCB(vcpu,rrs[0]));
- vcpu_itc_no_srlz(vcpu,2,ifa,pteval,pte,logps);
+ vcpu_itc_no_srlz(vcpu, 2, ifa, pteval, pte, logps, &entry);
if (swap_rr0) set_metaphysical_rr0();
if (p2m_entry_retry(&entry)) {
vcpu_flush_tlb_vhpt_range(ifa, logps);
pteval = translate_domain_pte(pte, ifa, itir, &logps, &entry);
if (!pteval) return IA64_ILLOP_FAULT;
if (swap_rr0) set_one_rr(0x0,PSCB(vcpu,rrs[0]));
- vcpu_itc_no_srlz(vcpu, 1,ifa,pteval,pte,logps);
+ vcpu_itc_no_srlz(vcpu, 1, ifa, pteval, pte, logps, &entry);
if (swap_rr0) set_metaphysical_rr0();
if (p2m_entry_retry(&entry)) {
vcpu_flush_tlb_vhpt_range(ifa, logps);
#include <asm/page.h>
#include <asm/vhpt.h>
#include <asm/vcpu.h>
+#include <asm/vcpumask.h>
#include <asm/vmmu.h>
/* Defined in tlb.c */
local_vhpt_flush(void)
{
__vhpt_flush(__ia64_per_cpu_var(vhpt_paddr));
+ perfc_incrc(local_vhpt_flush);
}
static void
vcpu_vhpt_flush(struct vcpu* v)
{
__vhpt_flush(vcpu_vhpt_maddr(v));
+ perfc_incrc(vcpu_vhpt_flush);
}
static void
}
#endif
+void
+domain_purge_swtc_entries(struct domain *d)
+{
+ struct vcpu* v;
+ for_each_vcpu(d, v) {
+ if (!test_bit(_VCPUF_initialised, &v->vcpu_flags))
+ continue;
+
+ /* Purge TC entries.
+ FIXME: clear only if match. */
+ vcpu_purge_tr_entry(&PSCBX(v,dtlb));
+ vcpu_purge_tr_entry(&PSCBX(v,itlb));
+ }
+}
+
+void
+domain_purge_swtc_entries_vcpu_dirty_mask(struct domain* d,
+ vcpumask_t vcpu_dirty_mask)
+{
+ int vcpu;
+
+ for_each_vcpu_mask(vcpu, vcpu_dirty_mask) {
+ struct vcpu* v = d->vcpu[vcpu];
+ if (!test_bit(_VCPUF_initialised, &v->vcpu_flags))
+ continue;
+
+ /* Purge TC entries.
+ FIXME: clear only if match. */
+ vcpu_purge_tr_entry(&PSCBX(v, dtlb));
+ vcpu_purge_tr_entry(&PSCBX(v, itlb));
+ }
+}
+
// SMP: we can't assume v == current, vcpu might move to another physical cpu.
// So memory barrier is necessary.
// if we can guranttee that vcpu can run on only this physical cpu
}
#endif
- for_each_vcpu (d, v) {
- if (!test_bit(_VCPUF_initialised, &v->vcpu_flags))
- continue;
-
- /* Purge TC entries.
- FIXME: clear only if match. */
- vcpu_purge_tr_entry(&PSCBX(v,dtlb));
- vcpu_purge_tr_entry(&PSCBX(v,itlb));
- }
+ domain_purge_swtc_entries(d);
smp_mb();
for_each_vcpu (d, v) {
perfc_incrc(domain_flush_vtlb_range);
}
+#ifdef CONFIG_XEN_IA64_TLB_TRACK
+#include <asm/tlb_track.h>
+#include <asm/vmx_vcpu.h>
+void
+__domain_flush_vtlb_track_entry(struct domain* d,
+ const struct tlb_track_entry* entry)
+{
+ unsigned long rr7_rid;
+ int swap_rr0 = 0;
+ unsigned long old_rid;
+ unsigned long vaddr = entry->vaddr;
+ struct vcpu* v;
+ int cpu;
+ int vcpu;
+
+ BUG_ON((vaddr >> VRN_SHIFT) != VRN7);
+ /*
+ * heuristic:
+ * dom0linux accesses grant mapped pages via the kernel
+ * straight mapped area and it doesn't change rr7 rid.
+ * So it is likey that rr7 == entry->rid so that
+ * we can avoid rid change.
+ * When blktap is supported, this heuristic should be revised.
+ */
+ vcpu_get_rr(current, VRN7 << VRN_SHIFT, &rr7_rid);
+ if (likely(rr7_rid == entry->rid)) {
+ perfc_incrc(tlb_track_use_rr7);
+ } else {
+ swap_rr0 = 1;
+ vaddr = (vaddr << 3) >> 3;// force vrn0
+ perfc_incrc(tlb_track_swap_rr0);
+ }
+
+ // tlb_track_entry_printf(entry);
+ if (swap_rr0) {
+ vcpu_get_rr(current, 0, &old_rid);
+ vcpu_set_rr(current, 0, entry->rid);
+ }
+
+ if (HAS_PERVCPU_VHPT(d)) {
+ for_each_vcpu_mask(vcpu, entry->vcpu_dirty_mask) {
+ v = d->vcpu[vcpu];
+ if (!test_bit(_VCPUF_initialised, &v->vcpu_flags))
+ continue;
+
+ /* Invalidate VHPT entries. */
+ vcpu_flush_vhpt_range(v, vaddr, PAGE_SIZE);
+ }
+ } else {
+ for_each_cpu_mask(cpu, entry->pcpu_dirty_mask) {
+ /* Invalidate VHPT entries. */
+ cpu_flush_vhpt_range(cpu, vaddr, PAGE_SIZE);
+ }
+ }
+ /* ptc.ga has release semantics. */
+
+ /* ptc.ga */
+ ia64_global_tlb_purge(vaddr, vaddr + PAGE_SIZE, PAGE_SHIFT);
+
+ if (swap_rr0) {
+ vcpu_set_rr(current, 0, old_rid);
+ }
+ perfc_incrc(domain_flush_vtlb_track_entry);
+}
+
+void
+domain_flush_vtlb_track_entry(struct domain* d,
+ const struct tlb_track_entry* entry)
+{
+ domain_purge_swtc_entries_vcpu_dirty_mask(d, entry->vcpu_dirty_mask);
+ smp_mb();
+
+ __domain_flush_vtlb_track_entry(d, entry);
+}
+
+#endif
+
static void flush_tlb_vhpt_all (struct domain *d)
{
/* First VHPT. */
#include <asm/fpswa.h>
#include <xen/rangeset.h>
-struct p2m_entry {
- volatile pte_t* pte;
- pte_t used;
-};
-
-static inline void
-p2m_entry_set(struct p2m_entry* entry, volatile pte_t* pte, pte_t used)
-{
- entry->pte = pte;
- entry->used = used;
-}
-
-static inline int
-p2m_entry_retry(struct p2m_entry* entry)
-{
- //XXX see lookup_domain_pte().
- // NULL is set for invalid gpaddr for the time being.
- if (entry->pte == NULL)
- return 0;
-
- return (pte_val(*entry->pte) != pte_val(entry->used));
-}
+struct p2m_entry;
+#ifdef CONFIG_XEN_IA64_TLB_TRACK
+struct tlb_track;
+#endif
extern void domain_relinquish_resources(struct domain *);
struct vcpu;
struct last_vcpu last_vcpu[NR_CPUS];
struct arch_vmx_domain arch_vmx; /* Virtual Machine Extensions */
+
+#ifdef CONFIG_XEN_IA64_TLB_TRACK
+ struct tlb_track* tlb_track;
+#endif
};
#define INT_ENABLE_OFFSET(v) \
(sizeof(vcpu_info_t) * (v)->vcpu_id + \
#define _PAGE_VIRT_D (__IA64_UL(1) << 53) /* Virtual dirty bit */
#define _PAGE_PROTNONE 0
+#ifdef CONFIG_XEN_IA64_TLB_TRACK
+#define _PAGE_TLB_TRACKING_BIT 54
+#define _PAGE_TLB_INSERTED_BIT 55
+#define _PAGE_TLB_INSERTED_MANY_BIT 56
+
+#define _PAGE_TLB_TRACKING (1UL << _PAGE_TLB_TRACKING_BIT)
+#define _PAGE_TLB_INSERTED (1UL << _PAGE_TLB_INSERTED_BIT)
+#define _PAGE_TLB_INSERTED_MANY (1UL << _PAGE_TLB_INSERTED_MANY_BIT)
+#define _PAGE_TLB_TRACK_MASK (_PAGE_TLB_TRACKING | \
+ _PAGE_TLB_INSERTED | \
+ _PAGE_TLB_INSERTED_MANY)
+
+#define pte_tlb_tracking(pte) \
+ ((pte_val(pte) & _PAGE_TLB_TRACKING) != 0)
+#define pte_tlb_inserted(pte) \
+ ((pte_val(pte) & _PAGE_TLB_INSERTED) != 0)
+#define pte_tlb_inserted_many(pte) \
+ ((pte_val(pte) & _PAGE_TLB_INSERTED_MANY) != 0)
+#endif // CONFIG_XEN_IA64_TLB_TRACK
+
/* domVTI */
#define GPFN_MEM (0UL << 60) /* Guest pfn is normal mem */
#define GPFN_FRAME_BUFFER (1UL << 60) /* VGA framebuffer */
--- /dev/null
+/******************************************************************************
+ * p2m_entry.h
+ *
+ * Copyright (c) 2006 Isaku Yamahata <yamahata at valinux co jp>
+ * VA Linux Systems Japan K.K.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ */
+
+#ifndef __ASM_P2M_ENTRY_H__
+#define __ASM_P2M_ENTRY_H__
+
+#include <asm/pgtable.h>
+
+struct p2m_entry {
+#define P2M_PTE_ALWAYS_RETRY ((volatile pte_t*) -1)
+ volatile pte_t* ptep;
+ pte_t used;
+};
+
+static inline void
+p2m_entry_set(struct p2m_entry* entry, volatile pte_t* ptep, pte_t used)
+{
+ entry->ptep = ptep;
+ entry->used = used;
+}
+
+static inline void
+p2m_entry_set_retry(struct p2m_entry* entry)
+{
+ entry->ptep = P2M_PTE_ALWAYS_RETRY;
+}
+
+static inline int
+p2m_entry_retry(struct p2m_entry* entry)
+{
+ /* XXX see lookup_domain_pte().
+ NULL is set for invalid gpaddr for the time being. */
+ if (entry->ptep == NULL)
+ return 0;
+
+ if (entry->ptep == P2M_PTE_ALWAYS_RETRY)
+ return 1;
+
+#ifdef CONFIG_XEN_IA64_TLB_TRACK
+ return ((pte_val(*entry->ptep) & ~_PAGE_TLB_TRACK_MASK) !=
+ (pte_val(entry->used) & ~_PAGE_TLB_TRACK_MASK));
+#else
+ return (pte_val(*entry->ptep) != pte_val(entry->used));
+#endif
+}
+
+#endif // __ASM_P2M_ENTRY_H__
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
#endif
// vhpt.c
+PERFCOUNTER_CPU(local_vhpt_flush, "local_vhpt_flush")
+PERFCOUNTER_CPU(vcpu_vhpt_flush, "vcpu_vhpt_flush")
PERFCOUNTER_CPU(vcpu_flush_vtlb_all, "vcpu_flush_vtlb_all")
PERFCOUNTER_CPU(domain_flush_vtlb_all, "domain_flush_vtlb_all")
PERFCOUNTER_CPU(vcpu_flush_tlb_vhpt_range, "vcpu_flush_tlb_vhpt_range")
+PERFCOUNTER_CPU(domain_flush_vtlb_track_entry, "domain_flush_vtlb_track_entry")
PERFCOUNTER_CPU(domain_flush_vtlb_range, "domain_flush_vtlb_range")
// domain.c
// dom0vp
PERFCOUNTER_CPU(dom0vp_phystomach, "dom0vp_phystomach")
PERFCOUNTER_CPU(dom0vp_machtophys, "dom0vp_machtophys")
+
+#ifdef CONFIG_XEN_IA64_TLB_TRACK
+// insert or dirty
+PERFCOUNTER_CPU(tlb_track_iod, "tlb_track_iod")
+PERFCOUNTER_CPU(tlb_track_iod_again, "tlb_track_iod_again")
+PERFCOUNTER_CPU(tlb_track_iod_not_tracked, "tlb_track_iod_not_tracked")
+PERFCOUNTER_CPU(tlb_track_iod_force_many, "tlb_track_iod_force_many")
+PERFCOUNTER_CPU(tlb_track_iod_tracked_many, "tlb_track_iod_tracked_many")
+PERFCOUNTER_CPU(tlb_track_iod_tracked_many_del, "tlb_track_iod_tracked_many_del")
+PERFCOUNTER_CPU(tlb_track_iod_found, "tlb_track_iod_found")
+PERFCOUNTER_CPU(tlb_track_iod_new_entry, "tlb_track_iod_new_entry")
+PERFCOUNTER_CPU(tlb_track_iod_new_failed, "tlb_track_iod_new_failed")
+PERFCOUNTER_CPU(tlb_track_iod_new_many, "tlb_track_iod_new_many")
+PERFCOUNTER_CPU(tlb_track_iod_insert, "tlb_track_iod_insert")
+PERFCOUNTER_CPU(tlb_track_iod_dirtied, "tlb_track_iod_dirtied")
+
+// search and remove
+PERFCOUNTER_CPU(tlb_track_sar, "tlb_track_sar")
+PERFCOUNTER_CPU(tlb_track_sar_not_tracked, "tlb_track_sar_not_tracked")
+PERFCOUNTER_CPU(tlb_track_sar_not_found, "tlb_track_sar_not_found")
+PERFCOUNTER_CPU(tlb_track_sar_found, "tlb_track_sar_found")
+PERFCOUNTER_CPU(tlb_track_sar_many, "tlb_track_sar_many")
+
+// flush
+PERFCOUNTER_CPU(tlb_track_use_rr7, "tlb_track_use_rr7")
+PERFCOUNTER_CPU(tlb_track_swap_rr0, "tlb_track_swap_rr0")
+#endif
--- /dev/null
+/******************************************************************************
+ * tlb_track.h
+ *
+ * Copyright (c) 2006 Isaku Yamahata <yamahata at valinux co jp>
+ * VA Linux Systems Japan K.K.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
+ *
+ */
+
+#ifndef __TLB_TRACK_H__
+#define __TLB_TRACK_H__
+
+#ifdef CONFIG_XEN_IA64_TLB_TRACK
+
+#include <xen/sched.h>
+#include <xen/perfc.h>
+#include <asm/domain.h>
+#include <xen/list.h>
+#include <asm/p2m_entry.h>
+#include <asm/vcpumask.h>
+
+// TODO: compact this structure.
+struct tlb_track_entry {
+ struct list_head list;
+
+ volatile pte_t* ptep; // corresponding p2m entry
+
+ /* XXX should we use TR_ENTRY? */
+ pte_t pte_val; // mfn and other flags
+ // pte_val.p = 1:
+ // tlb entry is inserted.
+ // pte_val.p = 0:
+ // once tlb entry is inserted, so
+ // this entry is created. But tlb
+ // purge is isseued, so this
+ // virtual address need not to be
+ // purged.
+ unsigned long vaddr; // virtual address
+ unsigned long rid; // rid
+
+ cpumask_t pcpu_dirty_mask;
+ vcpumask_t vcpu_dirty_mask;
+ // tlbflush_timestamp;
+
+#ifdef CONFIG_TLB_TRACK_CNT
+#define TLB_TRACK_CNT_FORCE_MANY 256 /* XXX how many? */
+ unsigned long cnt;
+#endif
+};
+
+struct tlb_track {
+
+/* see __gnttab_map_grant_ref()
+ A domain can map granted-page up to MAPTRACK_MAX_ENTRIES pages. */
+#define TLB_TRACK_LIMIT_ENTRIES \
+ (MAPTRACK_MAX_ENTRIES * (PAGE_SIZE / sizeof(struct tlb_track)))
+
+ spinlock_t free_list_lock;
+ struct list_head free_list;
+ unsigned int limit;
+ unsigned int num_entries;
+ unsigned int num_free;
+ struct list_head page_list;
+
+ /* XXX hash table size */
+ spinlock_t hash_lock;
+ unsigned int hash_size;
+ unsigned int hash_shift;
+ unsigned int hash_mask;
+ struct list_head* hash;
+};
+
+int tlb_track_create(struct domain* d);
+void tlb_track_destroy(struct domain* d);
+
+void tlb_track_free_entry(struct tlb_track* tlb_track,
+ struct tlb_track_entry* entry);
+
+void
+__vcpu_tlb_track_insert_or_dirty(struct vcpu *vcpu, unsigned long vaddr,
+ struct p2m_entry* entry);
+static inline void
+vcpu_tlb_track_insert_or_dirty(struct vcpu *vcpu, unsigned long vaddr,
+ struct p2m_entry* entry)
+{
+ /* optimization.
+ non-tracking pte is most common. */
+ perfc_incrc(tlb_track_iod);
+ if (!pte_tlb_tracking(entry->used)) {
+ perfc_incrc(tlb_track_iod_not_tracked);
+ return;
+ }
+
+ __vcpu_tlb_track_insert_or_dirty(vcpu, vaddr, entry);
+}
+
+
+/* return value
+ * NULL if this entry is used
+ * entry if this entry isn't used
+ */
+enum TLB_TRACK_RET {
+ TLB_TRACK_NOT_TRACKED,
+ TLB_TRACK_NOT_FOUND,
+ TLB_TRACK_FOUND,
+ TLB_TRACK_MANY,
+ TLB_TRACK_AGAIN,
+};
+typedef enum TLB_TRACK_RET TLB_TRACK_RET_T;
+
+TLB_TRACK_RET_T
+tlb_track_search_and_remove(struct tlb_track* tlb_track,
+ volatile pte_t* ptep, pte_t old_pte,
+ struct tlb_track_entry** entryp);
+
+void
+__tlb_track_entry_printf(const char* func, int line,
+ const struct tlb_track_entry* entry);
+#define tlb_track_entry_printf(entry) \
+ __tlb_track_entry_printf(__func__, __LINE__, (entry))
+#else
+
+#define tlb_track_create(d) (0)
+#define tlb_track_destroy(d) do { } while (0)
+#define vcpu_tlb_track_insert_or_dirty(vcpu, vaddr, entry) do { } while (0)
+
+#endif /* CONFIG_XEN_IA64_TLB_TRACK */
+
+#endif /* __TLB_TRACK_H__ */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
/* Global range-flush of vTLB. */
void domain_flush_vtlb_range (struct domain *d, u64 vadr, u64 addr_range);
+#ifdef CONFIG_XEN_IA64_TLB_TRACK
+struct tlb_track_entry;
+void __domain_flush_vtlb_track_entry(struct domain* d,
+ const struct tlb_track_entry* entry);
+/* Global entry-flush of vTLB */
+void domain_flush_vtlb_track_entry(struct domain* d,
+ const struct tlb_track_entry* entry);
+#endif
+
/* Flush vhpt and mTLB on every dirty cpus. */
void domain_flush_tlb_vhpt(struct domain *d);
extern void vcpu_set_next_timer(VCPU *vcpu);
extern BOOLEAN vcpu_timer_expired(VCPU *vcpu);
extern UINT64 vcpu_deliverable_interrupts(VCPU *vcpu);
-extern void vcpu_itc_no_srlz(VCPU *vcpu, UINT64, UINT64, UINT64, UINT64, UINT64);
+struct p2m_entry;
+extern void vcpu_itc_no_srlz(VCPU *vcpu, UINT64, UINT64, UINT64, UINT64, UINT64, struct p2m_entry*);
extern UINT64 vcpu_get_tmp(VCPU *, UINT64);
extern void vcpu_set_tmp(VCPU *, UINT64, UINT64);
--- /dev/null
+#ifndef __XEN_VCPUMASK_H
+#define __XEN_VCPUMASK_H
+
+/* vcpu mask
+ stolen from cpumask.h */
+typedef struct { DECLARE_BITMAP(bits, MAX_VIRT_CPUS); } vcpumask_t;
+
+#define vcpu_set(vcpu, dst) __vcpu_set((vcpu), &(dst))
+static inline void __vcpu_set(int vcpu, volatile vcpumask_t *dstp)
+{
+ set_bit(vcpu, dstp->bits);
+}
+#define vcpus_clear(dst) __vcpus_clear(&(dst), MAX_VIRT_CPUS)
+static inline void __vcpus_clear(vcpumask_t *dstp, int nbits)
+{
+ bitmap_zero(dstp->bits, nbits);
+}
+/* No static inline type checking - see Subtlety (1) above. */
+#define vcpu_isset(vcpu, vcpumask) test_bit((vcpu), (vcpumask).bits)
+
+#define first_vcpu(src) __first_vcpu(&(src), MAX_VIRT_CPUS)
+static inline int __first_vcpu(const vcpumask_t *srcp, int nbits)
+{
+ return min_t(int, nbits, find_first_bit(srcp->bits, nbits));
+}
+
+#define next_vcpu(n, src) __next_vcpu((n), &(src), MAX_VIRT_CPUS)
+static inline int __next_vcpu(int n, const vcpumask_t *srcp, int nbits)
+{
+ return min_t(int, nbits, find_next_bit(srcp->bits, nbits, n+1));
+}
+
+#if MAX_VIRT_CPUS > 1
+#define for_each_vcpu_mask(vcpu, mask) \
+ for ((vcpu) = first_vcpu(mask); \
+ (vcpu) < MAX_VIRT_CPUS; \
+ (vcpu) = next_vcpu((vcpu), (mask)))
+#else /* NR_CPUS == 1 */
+#define for_each_vcpu_mask(vcpu, mask) for ((vcpu) = 0; (vcpu) < 1; (vcpu)++)
+#endif /* NR_CPUS */
+
+#define vcpumask_scnprintf(buf, len, src) \
+ __vcpumask_scnprintf((buf), (len), &(src), MAX_VIRT_CPUS)
+static inline int __vcpumask_scnprintf(char *buf, int len,
+ const vcpumask_t *srcp, int nbits)
+{
+ return bitmap_scnprintf(buf, len, srcp->bits, nbits);
+}
+
+#endif /* __XEN_VCPUMASK_H */
+
+/*
+ * Local variables:
+ * mode: C
+ * c-set-style: "BSD"
+ * c-basic-offset: 4
+ * tab-width: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
#ifndef __ASSEMBLY__
#include <xen/percpu.h>
+#include <asm/vcpumask.h>
+
+extern void domain_purge_swtc_entries(struct domain *d);
+extern void domain_purge_swtc_entries_vcpu_dirty_mask(struct domain* d, vcpumask_t vcpu_dirty_mask);
//
// VHPT Long Format Entry (as recognized by hw)
/* Internal only: memory attribute must be WC/UC/UCE. */
#define _ASSIGN_nocache 1
#define ASSIGN_nocache (1UL << _ASSIGN_nocache)
+// tlb tracking
+#define _ASSIGN_tlb_track 2
+#define ASSIGN_tlb_track (1UL << _ASSIGN_tlb_track)
/* This structure has the same layout of struct ia64_boot_param, defined in
<asm/system.h>. It is redefined here to ease use. */